# Prevent printing of warnings and such in the HTML
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
library(tidyverse)
library(multitaper)
library(GenomicAlignments)
library(parallel)
This is to be done using the multitaper package https://github.com/krahim/multitaper/.
bam.locs <- dir("../../alignment/hisat2/output", pattern = ".bam", recursive = TRUE, full.names = TRUE)
names(bam.locs) <- bam.locs %>%
str_split(pattern = "/") %>%
map(function(x){x[[6]]}) %>%
str_remove(".bam") %>%
str_replace("AraR", "REL60")
Read the bams in, only keeping + strand reads and excluding ERCCs and tRNAs.
bam.list <- mclapply(bam.locs, function(x){
b1 <- readGAlignments(x)
b2 <- b1[strand(b1) == "+" & grepl("ECB_", seqnames(b1))]
return(b2)
}, mc.cores = 40)
Calculate periodicity, this seems to want to return a graph no matter what.
df.list <- lapply(bam.list, function(x){
# count number of reads at each end point
t1 <- table(factor(end(x), levels = 15:150))
# apply spec function
t2 <- spec.pgram(as.ts(t1))
# get a df of graphable parts, rescale the spec height to 0-1 so it's comparable across samples
t3 <- tibble(period = 1/t2$freq,
spec = t2$spec)
# return said df
return(t3)
})
Bind all those together
df1 <- bind_rows(df.list, .id = "sample") %>%
separate(sample, into = c("rep", "seqtype", "line"), sep = "-") %>%
filter(is.finite(period) & period <= 4) # only retain a small range
df1$line <- gsub("M", "-", df1$line) %>%
gsub("P", "+", .) %>%
gsub("AraR", "REL0", .)
# save it
write_csv(df1, "../../data_frames/three_nt_periodicity.csv")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] GenomicAlignments_1.28.0 Rsamtools_2.8.0
## [3] Biostrings_2.60.0 XVector_0.32.0
## [5] SummarizedExperiment_1.22.0 Biobase_2.52.0
## [7] MatrixGenerics_1.4.0 matrixStats_0.59.0
## [9] GenomicRanges_1.44.0 GenomeInfoDb_1.28.0
## [11] IRanges_2.26.0 S4Vectors_0.30.0
## [13] BiocGenerics_0.38.0 multitaper_1.0-15
## [15] forcats_0.5.1 stringr_1.4.0
## [17] dplyr_1.0.6 purrr_0.3.4
## [19] readr_1.4.0 tidyr_1.1.3
## [21] tibble_3.1.2 ggplot2_3.3.3
## [23] tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.2 sass_0.4.0 jsonlite_1.7.2
## [4] modelr_0.1.8 bslib_0.2.5.1 assertthat_0.2.1
## [7] highr_0.9 GenomeInfoDbData_1.2.6 cellranger_1.1.0
## [10] yaml_2.2.1 pillar_1.6.1 backports_1.2.1
## [13] lattice_0.20-44 glue_1.4.2 digest_0.6.27
## [16] rvest_1.0.0 colorspace_2.0-1 htmltools_0.5.1.1
## [19] Matrix_1.3-4 pkgconfig_2.0.3 broom_0.7.6
## [22] haven_2.4.1 zlibbioc_1.38.0 scales_1.1.1
## [25] BiocParallel_1.26.0 generics_0.1.0 ellipsis_0.3.2
## [28] withr_2.4.2 cli_2.5.0 magrittr_2.0.1
## [31] crayon_1.4.1 readxl_1.3.1 evaluate_0.14
## [34] fs_1.5.0 fansi_0.5.0 xml2_1.3.2.9001
## [37] tools_4.1.0 hms_1.1.0 lifecycle_1.0.0
## [40] munsell_0.5.0 reprex_2.0.0 DelayedArray_0.18.0
## [43] compiler_4.1.0 jquerylib_0.1.4 rlang_0.4.11
## [46] grid_4.1.0 RCurl_1.98-1.3 rstudioapi_0.13
## [49] bitops_1.0-7 rmarkdown_2.8 gtable_0.3.0
## [52] DBI_1.1.1 R6_2.5.0 lubridate_1.7.10
## [55] knitr_1.33 utf8_1.2.1 stringi_1.6.2
## [58] Rcpp_1.0.6 vctrs_0.3.8 dbplyr_2.1.1
## [61] tidyselect_1.1.1 xfun_0.23